import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_excel('旅游网站精华游记数据_预处理.xlsx')
plt.rcParams['font.sans-serif'] = 'SimHei'
df_month = df.groupby('月份').size()
plt.figure(figsize=(10,5))
x = df_month.index
plt.plot(x, df_month, color=(0.894,0,0.498))
plt.xticks(range(1,13))
plt.xlabel('月份')
plt.ylabel('旅游次数')
plt.title('每月游客旅游次数折线图')
for a,b in zip(x, df_month):
    plt.text(a,b,'%d' % b, ha='center')
plt.show()

plt.figure(figsize=(10,9))
plt.subplot(2,1,1)
plt.hist(df['天数'],color=(0.894,0,0.498),edgecolor='k')
plt.xlabel('天数')
plt.ylabel('旅游次数')
plt.title('按天数统计旅游次数直方图')
plt.subplot(2,1,2)
plt.hist(df['人均消费（元）'],color=(0.894,0,0.498),edgecolor='k')
plt.xlabel('人均消费/元')
plt.ylabel('旅游次数')
plt.title('按人均消费统计旅游次数直方图')
plt.show()

data_label=df['旅行标签'].dropna()
label=data_label.str.split(expand=False)
label_list=[]
for i in label:
    label_list.extend(i)
df_label=pd.DataFrame(label_list,columns=['标签'])
df_label['次数']=1
df_label_count=df_label.groupby('标签').agg('count').sort_balues(by='次数',ascending=False).head(5)
plt.figure()
plt.pie(df_label_count['次数'],labels=df_label_count.index,autopct='%.2f%%')
plt.title('旅客旅游方式饼图')
plt.show()